from tool import getTerm
# 文法类
class Grammar:
    # 给出文法左部和右部，生成文法对象
    def __init__(self,left,right,pos=0):
        self._left = left;
        self._oriRight = right;
        syms = []
        # 首先需要切割右部，分成若干个终结符和非终结符
        while True:
            # print('cur right is !%s!' % right)
            if right ==None or len(right) == 0:
                break;
            if right[0] == '<':   # 如果是一个非终结符
                endPos = right.find('>');    #找到截止位置
                syms.append(right[:endPos+1]) # 添加这个非终结符
                right = right[endPos+1:]  # 剪断
            else:   # 如果是一个非终结符
                ter = getTerm(right)
                right = right[len(ter):]
                syms.append(ter)
        # 设置右侧项目表
        self._right = syms;
        # 当前的位置为0
        self._pos = pos;

    # 得到左部
    def getLeft(self):
        return self._left;

    # 得到右部
    def getRight(self):
        return self._right;
    # 得到当前方向
    def getCurDire(self):
        # 如果已经可以规约了
        if self._pos == len(self._right):
            return None
        return self._right[self._pos]
    # 是否可以规约
    def isReg(self):
        return self.getCurDire() == None
    # 前进一格
    def forward(self):
        if self._pos < len(self._right):
            self._pos += 1;
    # 转换成字符串
    def __str__(self):
        lastFlag = True
        res = str(self._left) + '->'
        for i in range(len(self._right)):
            
            if i == self._pos:
                res += '!'
                lastFlag = False;
            res += self._right[i];
        if lastFlag:
            res += '!'
        return res;
    # 获得一个数据拷贝
    def copy(self):
        return Grammar(self._left,self._oriRight,self._pos);
    
    def __hash__(self):
        return hash(str(self))
 
    def __eq__(self, other):
        if self._left == other._left and self._oriRight == other._oriRight and self._pos == other._pos:
            return True
        return False
    # 得到位置为0的原始文法
    def getOriGram(self):
        return Grammar(self._left,self._oriRight,0);
def getGras(filename = 'gra.txt'):
    # 读取所有文法，返回一个列表与一个 文法-序号映射，左部-文法列表映射
    graList = []
    graOrder = {}
    graMap = {}
    order = 0
    with open(filename,'r') as f:
        for gra in f.readlines():
            # print('处理到%s'%gra)
            curGra = gra.replace('\n','').replace(' ','').replace('<','<').replace('\t','').split('→')
            tempGra = Grammar(curGra[0],curGra[1])
            graList.append(tempGra)
            graOrder[tempGra] = order
            if graMap.get(curGra[0]) == None:
                graMap[curGra[0]] = []
            graMap[curGra[0]].append(tempGra);
            order += 1
    return graList,graOrder,graMap;


# 判断非终结符是否可以为空
# 首先非终结符对应的右部，如果有长度为0的项，可以为空
def isEpsilon(unTerm,graMap):

    # print('进行到%s'%unTerm)
    # 终结符必然不能为空
    if unTerm[0] != '<':
        return False
    # 拿到非终结符为左部的文法列表
    items = graMap[unTerm]
    # if unTerm == '<分程序>':
        # print('符号为%s'%unTerm)
    # 遍历每条文法
    for item in items:
        rights = item.getRight()
        # 如果直接导出空
        if len(rights) == 0:
            # 可空
            return True
        # 查看右边的每一个符号
        for right in rights:
            
            # 防止循环检查
            if right == unTerm:
                continue
            # 遇到终结符必然不可能为空
            if right[0] != '<':
                # return False
                break
            else:
                # 如果这个非终结符不能为空
                if not isEpsilon(right,graMap):
                    # if unTerm == '<分程序>':
                        # print(' %s不可为空'%right)
                    # return False
                    break
        # 每个符号都可以为空
        else:
            # if unTerm == '<分程序>':
                # print('分程序返回空')
            return True
            #print('发现不可为空')
            #continue
        #return True
    return False
# 第一遍，把所有不能为空的都加进去
def firstFirst(gras,firsts,graMap):
    # 遍历所有非终结符
    for gra in gras:
        # 得到左部的非终结符
        unTerm = gra.getLeft();
        # 跳过S
        if unTerm == 'S':
            continue
        # 还没有信息则初始化集合
        if firsts.get(unTerm) == None:
            firsts[unTerm] = set()
        # 得到文法右部符号串
        rights = gra.getRight()
        # 处理epsilon
        if len(rights) == 0:
            firsts[unTerm].add('e')
        if isEpsilon(unTerm,graMap):
            firsts[unTerm].add('e')
        # # 处理第一个遇到的符号，不是非终结符则添加
        # elif rights[0][0] != '<':
        #     firsts[unTerm].add(rights[0])
        # 遍历串
        for right in rights:
            # 加进去
            firsts[unTerm].add(right)
            # 如果不能为空退出
            if not isEpsilon(right,graMap):
                break;
            # # 如果遇到非终结符
            # if r[0] == '<':
            #     # 先不处理
            #     break;
            # else:
            #     # 加入到first集
            #     firsts[unTerm].add(r)
            #     break;
    # return firsts
# 集合里有非终结符
def hasUnTerm(symSet):
    symList = list(symSet)
    for s in symList:
        if s[0] == '<':
            return True
    return False
# 第二遍，把非终结符合并
def firstSecond(unTerms,firsts):
    # 把集合分为first里含有非终结符和没有非终结符 ufSet,fSet两个集合
    ufSet = set()
    fSet = set()

    for unTerm in unTerms:
        if hasUnTerm(firsts[unTerm]):
            ufSet.add(unTerm)
        else:
            fSet.add(unTerm)
    # 当uf集合不为空时进行
    while len(ufSet) > 0:
        # 遍历fSet中的每一个元素f
        fList = list(fSet)
        for f in fList:
            # 遍历ufSet中每一个元素u
            uList = list(ufSet)
            for u in uList:
                # 首先要删除等于自身的项目，防止无限循环
                if u in firsts[u]:
                    firsts[u].remove(u)
                # 如果u对应的first集合包含f
                if f in firsts[u]:
                    # 把f从u的First中删除
                    firsts[u].remove(f)
                    # 是否移除合并后的epsilon
                    noEps = False
                    # 注意对epsilon的处理，e在u中不在f中
                    if 'e' in firsts[f] and (not 'e' in firsts[u]):
                        noEps = True
                    # 把u和f对应的first集合合并
                    firsts[u] |= firsts[f];

                    if noEps:
                        # 移除
                        firsts[u].remove('e')
                    
                # 如果u全为非终结符
                if not hasUnTerm(firsts[u]):
                    # 从ufSet删除u，将u加入fSet
                    ufSet.remove(u)
                    fSet.add(u)
# 右部follow和守门员福利
def followFirst(unTerms,gras,follows,firsts,graMap):
    dbStr = ''
    # 遍历所有非终结符
    for unTerm in unTerms:
        if unTerm == dbStr:
            print('处理%s'%unTerm)
        # 如果没有添加过这个follow
        if follows.get(unTerm) == None:
            # 初始化
            follows[unTerm] = set()   
        # 遍历所有文法
        for gra in gras:
            if unTerm == dbStr:
                print('当前文法为%s'%str(gra))
            left = gra.getLeft()
            rights = gra.getRight()
            # 加入first集合标志
            add = False
            # 遍历文法右部符号
            for right in rights:
                if unTerm == dbStr:
                    print(right)
                # 需要加入了
                if add:
                    if unTerm == dbStr:
                        print('加入%s'%right)
                    # 非终结符取并集
                    if right[0] == '<':

                        # 加到集合去
                        follows[unTerm] |= firsts[right]
                    # 终结符添加
                    else:
                        follows[unTerm].add(right)
                # 如果发现了unTerm
                if right == unTerm:
                    if unTerm == dbStr:
                        print('发现%s'%right)
                    # 需要将后面的都first加入follow
                    add = True
                # 如果发现了障碍，右部follow结束
                if right != unTerm and add and (not isEpsilon(right,graMap)):
                    break;
            else:
                # print(follows[unTerm])
                # 全透风，且应该添加，守门员福利
                if add and left != unTerm:
                    follows[unTerm].add(left);
def followSecond(follows,unTerms):
    # 把集合分为first里含有非终结符和没有非终结符 ufSet,fSet两个集合
    ufSet = set()
    fSet = set()

    for unTerm in unTerms:
        if hasUnTerm(follows[unTerm]):
            ufSet.add(unTerm)
        else:
            fSet.add(unTerm)
    # 遍历fSet中的每一个元素f
    fList = list(fSet)
    for f in fList:
        # 遍历ufSet中每一个元素u
        uList = list(ufSet)
        for u in uList:
            # 首先要删除等于自身的项目，防止无限循环
            if u in follows[u]:
                follows[u].remove(u)
            # 如果u对应的follow集合包含f
            if f in follows[u]:
                # 把f从u的First中删除
                follows[u].remove(f)
                # 把u和f对应的first集合合并
                follows[u] |= follows[f];
                
                    
            # 如果u全为非终结符
            if not hasUnTerm(follows[u]):
                # 从ufSet删除u，将u加入fSet
                ufSet.remove(u)
                fSet.add(u) 
    # 代入相消
    uList = list(ufSet)
    # 遍历每一个含有非终结符的follow
    for unTerm in uList:
        # 遍历非终结符的每一个元素
        for u in ufSet:
            # 如果u的follow集包含unTerm
            if unTerm in follows[u]:
                # 把u的Follow集与unTerm的follow集合并，去除和左部相同的元素
                follows[u] |= follows[unTerm]
                if u in follows[u]:
                    follows[u].remove(u);
                if unTerm in follows[u]:
                    follows[u].remove(unTerm);
    # 移除epsilon
    for unTerm in unTerms:
        if 'e' in follows[unTerm]:
            follows[unTerm].remove('e')
        if 'S' in follows[unTerm]:
            follows[unTerm].remove('S')


    # 当uf集合不为空时进行

        # 遍历
    # # 当uf集合不为空时进行
    # while len(ufSet) > 0:
    #     # 遍历fSet中的每一个元素f
    #     fList = list(fSet)
    #     for f in fList:
    #         # 遍历ufSet中每一个元素u
    #         uList = list(ufSet)
    #         for u in uList:
    #             # 首先要删除等于自身的项目，防止无限循环
    #             if u in follows[u]:
    #                 follows[u].remove(u)
    #             # 如果u对应的follow集合包含f
    #             if f in follows[u]:
    #                 # 把f从u的First中删除
    #                 follows[u].remove(f)
    #                 # 把u和f对应的first集合合并
    #                 follows[u] |= follows[f];
    #                 # 移除epsilon
    #                 if 'e' in follows[u]:
    #                     follows[u].remove('e')
                    
    #             # 如果u全为非终结符
    #             if not hasUnTerm(follows[u]):
    #                 # 从ufSet删除u，将u加入fSet
    #                 ufSet.remove(u)
    #                 fSet.add(u)    
def getAllSymble(gras):
    title = ['const','var','procedure','begin','end','ood','if','then',
    'while','do','read','call','write','#',':=','=','(',')',';',',','.','id','un','re','mn','td']

    unTerms = set()
    for gra in gras:
        unTerms.add(gra.getLeft())
    unTerms.remove('S')
    title += list(unTerms)
    return title
# 得到First和Follow集合
def firstAndFollow(gras,graMap):

    # 首先计算所有非终结符
    unTerms = set()
    for gra in gras:
        unTerms.add(gra.getLeft())
    # 不需要开始符号
    unTerms.remove('S')

    # 计算所有first
    firsts = {}
    # 把终结符和first(非终结符)加到终结符里面去
    firstFirst(gras,firsts,graMap)
    # 合并非终结符
    firstSecond(unTerms,firsts)
    # first集合计算完成，打印检验
    # for f in firsts.items():
    #     print('%s:%s'%(f[0],f[1]))

    # 计算Follow集
    follows = {}

    # 落井下S
    follows['<程序>'] = {'#'};
    # 右部follow和守门员福利
    followFirst(unTerms,gras,follows,firsts,graMap)
    # 替换非终结符
    followSecond(follows,unTerms)


    # follow集合计算完成，打印检验
    # for f in follows.items():
    #     print('%s:%s'%(f[0],f[1]))

    #pass
    return firsts,follows
if __name__ == '__main__':
    # gra = Grammar('<常量说明部分>','const<常量定义组>;',2) 
    # print(gra.isReg())
    # test = {}
    # test[gra] = 3
    # print(test[gra])
    l,o,m = getGras()
    #print(getAllSymble(l))
    # for g in l:
    #     print('%s %s'%(g,o[g]))
    # for i in m.items():
    #     print('%s %s'%(i[0],str(i[1])))
    # # print(l)
    # f,f1 = firstAndFollow(l,m)
    # # print(f1)
    # for g in l:
    #     un = g.getLeft()
    #     print(('%s:%s')%(un,isEpsilon(un,m)))